home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Source Code
/
Libraries
/
Advanced I⁄O v2.3
/
Advanced i⁄o
/
arithm_modadh.cc
< prev
next >
Wrap
Text File
|
1995-07-08
|
10KB
|
328 lines
// This may look like C code, but it is really -*- C++ -*-
/*
************************************************************************
*
* Arithmetic Coding
* Adaptive Histogram-based model for the data source
*
*
* The present file provides the functionality for both building the
* histogram for a sequence of integers items and supplying the
* arithmetic codec with probabilities of the items estimated from
* the histogram.
*
* The program uses the histogram to find out which symbols shall really
* come out and how frequent. It allocates frequency tables only for
* the symbols that really rather than potentially need to be coded.
* In all other respects (assuming Lorentzian-like initial probability
* distribution and tailoring as more symbols become known) the
* model behaves way the same as the simple AdaptiveModel does.
*
* $Id: arithm_modadh.cc,v 2.0 1995/02/07 19:37:03 oleg Exp oleg $
*
************************************************************************
*/
#ifdef __GNUC__
#pragma implementation "arithm_modadh.h"
#endif
#include "arithm_modadh.h"
#include "std.h"
/*
*========================================================================
* Histogram-based Adaptive model for the data source
*/
/*
*------------------------------------------------------------------------
* Constructors and Destructors
*/
// Constructor
AdaptiveHistModel::AdaptiveHistModel(const Histogram& histogram)
: no_distinct_symbols(histogram.no_distinct_symbols()),
update_inc(0)
{
// Skip leading and tailing dummy symbols
// (never occured in the histogram)
for(symbol_lwb=histogram.symbol_lwb;
symbol_lwb < histogram.symbol_upb && histogram.get(symbol_lwb) == 0;
symbol_lwb++)
;
for(symbol_upb=histogram.symbol_upb;
symbol_upb > symbol_lwb && histogram.get(symbol_upb) == 0;
symbol_upb--)
;
assure( symbol_upb > symbol_lwb,
"there should be at least 2 symbols to code" );
no_potential_symbols = symbol_upb - symbol_lwb + 1;
assert( no_potential_symbols >= no_distinct_symbols );
allocate_model(no_distinct_symbols);
symbol_to_index = new Index[no_potential_symbols];
index_to_symbol = new Symbol[no_distinct_symbols];
initialize_model(histogram);
}
// Constructor for a dummy model. The real
// model contents is supposed to be read
// in later
AdaptiveHistModel::AdaptiveHistModel(void)
: symbol_lwb(0), symbol_upb(0),
no_potential_symbols(0),
no_distinct_symbols(0),
update_inc(0)
{
symbol_to_index = 0;
index_to_symbol = 0;
}
// Destructor
AdaptiveHistModel::~AdaptiveHistModel(void)
{
assert( symbol_to_index != 0 && index_to_symbol != 0 );
delete [] symbol_to_index;
delete [] index_to_symbol;
}
/*
*------------------------------------------------------------------------
* Sort the histogram and set up the model
*/
static struct Hist_elem
{ unsigned int count; Symbol symbol; }
* Hist_sorted;
// Comparison routine to sort the Histogram
// in the descending order
static int hist_compar(const void * eli, const void * elj)
{
if( ((const Hist_elem*)eli)->count < ((const Hist_elem*)elj)->count )
return 1;
else if( ((const Hist_elem*)eli)->count == ((const Hist_elem*)elj)->count )
return 0;
else
return -1;
}
void AdaptiveHistModel::initialize_model(const Histogram& histogram)
{
Hist_sorted = new Hist_elem[no_distinct_symbols];
register Symbol symbol;
register int i;
for(symbol=histogram.symbol_lwb,i=0; symbol<=histogram.symbol_upb; symbol++)
{
unsigned int count = histogram.get(symbol);
if( count != 0 )
Hist_sorted[i].count = count, Hist_sorted[i].symbol = symbol, i++;
}
assert( i == no_distinct_symbols );
qsort(Hist_sorted,no_distinct_symbols,sizeof(Hist_sorted[0]),hist_compar);
memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
for(i=0; i<no_distinct_symbols; i++)
{
Index index = i+1;
Symbol symbol = Hist_sorted[i].symbol;
assert( symbol <= symbol_upb && symbol >= symbol_lwb );
symbol_to_index[symbol-symbol_lwb] = index;
index_to_symbol[index-1] = symbol;
}
assert( i<EOF_index );
initial_distribution();
}
// Assign initial frequency counts
// according to the Lorentzian ditribution
void AdaptiveHistModel::initial_distribution(void)
{
register int i;
cum_frequencies[no_indices] = 0;
// update_inc > 1 means the real
// occurence of a symbol means more
update_inc = no_indices/10 + 1; // than just a potential occurence.
// Since all frequencies must be >0,
// frequency count one can be regarded
// as being "infinitely small"
// The folowing distribution is
// certaily ad hoc, but seems to work
for(i=no_indices; i>0; i--)
{
frequencies[i] = ( i <= 2 ? 45*update_inc-30 :
i <= 12 ? 3*update_inc : 1 );
cum_frequencies[i-1] = cum_frequencies[i] + frequencies[i];
}
frequencies[0] = 0; // Must be zero (an impossible value)
}
/*
*------------------------------------------------------------------------
* Searching for index/symbol
*/
// Return the index of a given symbol
Index AdaptiveHistModel::get_index(const Symbol symbol) const
{
if( symbol < symbol_lwb || symbol > symbol_upb )
_error("Symbol %d is out of interval [%d,%d]",
symbol, symbol_lwb, symbol_upb);
Index index = symbol_to_index[symbol-symbol_lwb];
if( index == 0 )
_error("\nSymbol %d has not been counted when Histogram was constructed",
symbol);
return index;
}
// Return the symbol for a given index
Symbol AdaptiveHistModel::get_symbol(const Index index) const
{
if( index < 1 || index > no_indices || index == EOF_index )
_error("Index %d is out of boundaries [1,%d)",index,no_indices);
return index_to_symbol[index-1];
}
/*
*------------------------------------------------------------------------
* Input/Output for the frequency tables
* As a matter of fact, only index_to_symbol[] table along with its size
* needs to be saved. All other tables can easily be reconstructed from that.
*
* Thus, the format the tables are written is follows
* short no_distinct_symbols
* short index_to_symbol[0..no_distinct_symbols-1]
* The tables entries are coded using a variable bit size algorithm,
* see BitIn/BitOut for more details.
*/
void AdaptiveHistModel::open(BitOut& file)
{
file.write_short(no_distinct_symbols);
for(register int i=0; i<no_distinct_symbols; i++)
file.put_short(index_to_symbol[i]);
}
// Read in the tables and set up
// the model
void AdaptiveHistModel::open(BitIn& file)
{
no_distinct_symbols = file.read_short("Reading AdaptiveHistModel tables");
allocate_model(no_distinct_symbols);
index_to_symbol = new Symbol[no_distinct_symbols];
for(register int i=0; i<no_distinct_symbols; i++)
{
const Symbol symbol = file.get_short();
if( i== 0 )
symbol_lwb = symbol_upb = symbol;
else
symbol_lwb = symbol_lwb > symbol ? symbol : symbol_lwb,
symbol_upb = symbol_upb < symbol ? symbol : symbol_upb;
index_to_symbol[i] = symbol;
}
no_potential_symbols = symbol_upb - symbol_lwb + 1;
assert( no_potential_symbols > 1 );
assert( no_distinct_symbols <= no_potential_symbols );
symbol_to_index = new Index[no_potential_symbols];
memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
for(register Index index=1; index<=no_distinct_symbols; index++)
{
Symbol symbol = index_to_symbol[index-1];
symbol_to_index[symbol-symbol_lwb] = index;
}
initial_distribution();
}
/*
*------------------------------------------------------------------------
* Update the adaptive model
* to account for occurence of a particular index
*/
// Scale the accumulated statistics down
// in anticipation of a change, or simply to
// keep counters from overflow
void AdaptiveHistModel::scale_down_past(void)
{
register int cum = 0; // If so, halve all the counts
for(register int i=no_indices; i>=0; i--)
{ // keeping them nonzero, and
cum_frequencies[i] = cum; // recompute the cumulative counts
cum += (frequencies[i] = (frequencies[i]+1) >> 1);
}
}
void AdaptiveHistModel::update_model(const Index index)
{
// See if freq counts near their max
if( total_cum_freq() >= (Top_freq>>1) ) // /2 makes rescaling work more often
scale_down_past();
// Tentatively increment the freq
// count for index, just an estimate
const Lword tent_freq = frequencies[index] +
(update_inc > 4 ? update_inc : 1);
// Try to pull 'index' to the beginning
// of the freq. table, skipping symbols
// with the lower freq.
register Lword * fp = &frequencies[index];
while( *--fp != 0 && *fp <= tent_freq ) // Note, freq[0] is always 0
;
// Find a new position for the index
const Index new_index = ++fp - frequencies;
if( new_index < index ) // If the index is to move, update
{ // the translation tables
Symbol symbol_oind = index_to_symbol[index-1];
Symbol symbol_nind = index_to_symbol[new_index-1];
index_to_symbol[index-1] = symbol_nind;
index_to_symbol[new_index-1] = symbol_oind;
symbol_to_index[symbol_oind-symbol_lwb] = new_index;
symbol_to_index[symbol_nind-symbol_lwb] = index;
// Note, we've just swapped index
// and new_index. If their frequencies
// differ, we need to update the
// frequecies and cumulative freqs
const Lword old_freq = frequencies[index];
const Lword new_freq = frequencies[new_index];
if( old_freq != new_freq )
{
frequencies[index] = new_freq;
frequencies[new_index] = old_freq;
const long int delta = new_freq - old_freq;
for(register Lword * cfp = &cum_frequencies[new_index];
cfp < &cum_frequencies[index];)
*cfp++ += delta;
}
}
frequencies[new_index] += update_inc; // Increment the freq count for index
register Lword * cfp; // And update the cumulative counts
for(cfp=&cum_frequencies[new_index]; cfp>&cum_frequencies[0]; )
*--cfp += update_inc;
#if 0
{
message("index %d, new_index %d, update_inc %d\n",index,new_index,update_inc);
for(register Index i=0; i<no_indices; i++)
message("%d ",frequencies[i]);
message("\n");
}
#endif
}